## FIGURE 2 ###
library(gplots)
library(pheatmap)
library(ggplot2)
library(RColorBrewer)

# PANEL A
# Loading data
path<-paste0('./','matrix_coocurrences_fig3.tsv')
df <- read.csv(path,sep='\t',row.names=1)

colnames(df)[2] <- "R-M"
matrix<- as.matrix(df)


system_names <- rownames(matrix) 

# Assign a functional category to each system
categories <- data.frame(defsys= c("Ssp", "R-M","Dnd", "BREX", "Cas", "AbiH", "RosmerTA", "SanaTA", "CapRel", "Rst_PARIS", "Retron","PrrC", "DarTG", "CBASS", "Gao_Qat", "Gabija", "Septu", "Shedu","Lamassu-Fam","Menshen", "Mokosh","Druantia", "PD-T7-5","PD-T4-5"), 
                         mechanism= c("Innate","Innate","Innate","Innate", "Adaptative", "Abortive", "Abortive", "Abortive", "Abortive","Abortive","Abortive","Abortive","Abortive","Abortive", "Unknown","Unknown","Unknown","Unknown","Unknown","Unknown" ,"Unknown", "Unknown","Unknown","Unknown"))
row.names(categories) <- categories$defsys
categories$defsys <- NULL

# Function to calculate the significance of each co-appearence
calculate_signif <- function(matrix) {
  n <- sum(matrix)  # Total number of observations
  
  # Build a matrix in which the p-values are stored
  p_values <- matrix(NA, nrow = nrow(matrix), ncol = ncol(matrix))
  
  # Iteration
  for (i in 1:nrow(matrix)) {
    for (j in 1:ncol(matrix)) {
      if (i != j) {  # Exclude main diagonal
        # Calculate the expected freq
        expected <- sum(matrix[i,]) * sum(matrix[,j]) / n
        
        # Fisher's exact test
        result_test <- fisher.test(matrix(c(matrix[i,j], 
                                               sum(matrix[i,]) - matrix[i,j], 
                                               sum(matrix[,j]) - matrix[i,j], 
                                               n - sum(matrix[i,]) - sum(matrix[,j]) + matrix[i,j]), 
                                             nrow = 2))
        
        
        # Store the p-value resulting of the fisher's test in the previous matrix of p-values
        p_values[i,j] <- result_test$p.value
      }
    }
  }
  
  return(p_values)
}

# Assign a significance level to each co-occurrence

p_values_matrix <- calculate_signif(matrix) # Apply the function
rownames(p_values_matrix) <- system_names
colnames(p_values_matrix) <- system_names

label_matrix <- ifelse(p_values_matrix<0.001, '***', ifelse(p_values_matrix < 0.01, '**', ifelse(p_values_matrix< 0.05, '*', '')) )
label_matrix[is.na(label_matrix)] <- ''


# Calculate relative freqs
for (i in 1:nrow(matrix)) {
  for (j in 1:ncol(matrix)) {
    if (i != j) {
      matrix[i,j] <- matrix[i,j] / matrix[i,i]
    }
  }
}

# Remove the values of the main diagonal
matrix <- ifelse(matrix > 1, NA, matrix)

### Draw the heatmap 
colnames(matrix)[c(19,23,24)] <- c("Lamassu-Fam", "PD-T7-5","PD-T4-5")
my_colors<-colorRampPalette(c("#FFFFFF","lightsteelblue1","#BCD2EE", "#8DB6CD","#607B8B"))(30)
my_colors <- c(rep("#FFFFFF", 18), my_colors)

A<-pheatmap(matrix, cluster_rows=FALSE,cluster_cols= FALSE,display_numbers = label_matrix,fontsize_number=13, annotation_col = categories, annotation_row = categories, annotation_colors = list(mechanism=c("Innate"="#AB82FF", "Adaptative"="#7CCD7C","Abortive"="#EE6363","Unknown"="lightgoldenrod1")),
             color= my_colors, scale= "row", legend_breaks=c(-3,3), legend_labels=c("No co-occur","Co-occur"), fontsize_row = 13, fontsize_col = 13, na_col = "black")



# PANEL B
library(ggVennDiagram)
library(grid)

# Loading data - Panel B
dfv<- read.csv("./st_sys_subsys.tsv",sep="\t",header=FALSE,col.names = c('strains','sys','subsys','genes'),row.names=1)

# Get IDs of the strains with each defense system types
ssp <- rownames(dfv[grepl("SspBCDE",dfv$sys),])
rm <- rownames(dfv[grepl("R-M",dfv$sys),])
brex <- rownames(dfv[grepl("BREX",dfv$sys),])
dnd <- rownames(dfv[grepl("Dnd",dfv$sys),])
cas<- rownames(dfv[grepl("Cas",dfv$sys),])
gq<- rownames(dfv[grepl("Gao_Qat",dfv$sys),])
pd.t4<-rownames(dfv[grepl("PD-T4-5",dfv$sys),])
pd.t7 <- rownames(dfv[grepl("PD-T7-5",dfv$sys),])
rTA <- rownames(dfv[grepl("RosmerTA",dfv$sys),])
gab <- rownames(dfv[grepl("Gabija",dfv$sys),])

my_colors<-colorRampPalette(c("#FFFFFF", "#8DB6CD","#607B8B"))(30)

v_innate<-ggVennDiagram(
  x = list(rm,ssp,brex,dnd), color = "black", lwd = 0.8, lty=1,
  label_size = 4.5, label_alpha = 0, set_size= 3.7,
  category.names = c("R-M","Ssp", "BREX","Dnd")) + ggtitle("Innate systems") +
  scale_fill_gradientn(colors = my_colors) + scale_color_manual(values=rep("black",4)) + theme(legend.position= "none", plot.title = element_text(size = 12, face = "bold", hjust = 0.5, vjust = 2))


v_st2 <- ggVennDiagram(
  x = list(pd.t4,gq,ssp,pd.t7), color = "black",lwd = 0.8, lty=1, 
  label_size = 4.5, label_alpha = 0,set_size=3.7,
  category.names = c("PD-T4","Gao_Qat","Ssp","PD-T7")) + ggtitle("Systems associated\n with SspBCDE") +
  scale_fill_gradientn(colors = my_colors) + scale_color_manual(values=rep("black",4)) + theme(legend.position= "none", plot.title = element_text(size = 12, face = "bold", hjust = 0.5, vjust = 2))

v_cas <- ggVennDiagram(
  x = list(cas,rm,gab,rTA,gq), color="black",lwd = 0.8, lty=1,
  label_size = 4.5, label_alpha = 0,set_size=3.7, label="percent",
  category.names= c("Cas","R-M","Gabija","RosmerTA", "Gao_Qat"))+ ggtitle("Systems associated\n with Cas") + 
  scale_fill_gradientn(colors= my_colors) + scale_color_manual(values=rep("black",4)) + theme(legend.position = "none", plot.title = element_text(size = 12, face = "bold", hjust = 0.5, vjust = 2))


# Join plots
library(cowplot)
library(ggplotify)

empty1<- ggplot() + theme_minimal()
empty2<-ggplot() + theme_minimal()
B<-plot_grid(v_innate, v_st2,v_cas, ncol=3,nrow=1,rel_widths=c(5,5,5),vjust= 0.9, hjust= -0.8,label_size = 5)
gpA<- as.ggplot(A)
fig3 <- plot_grid(empty1,gpA,empty2,B,labels=c("A","","B",""),nrow=4, rel_heights= c(0.1,5,0.3,2.5))

pdf("fig3.pdf", height= 14, width = 12, paper = "special")
fig3
dev.off()
